Loading libraries

dyn.load("/home/moyra.lawrence/miniconda3/envs/monocle3/lib/libgeos_c.so.1")
library(Seurat)
## Warning in fun(libname, pkgname): rgeos: versions of GEOS runtime 3.11.0-CAPI-1.17.0
## and GEOS at installation 3.8.0-CAPI-1.13.1differ
library(patchwork)
library(ggplot2)
library(gridExtra)
library(openxlsx)
library(svglite)
library(dplyr)
library(gt)
library(tidyverse)
library(stringi)
library(SeuratObject)
library(biomaRt)
library(stringr)

Load data

load(file='merge_filtered3.rda')  

Checking merge

head(colnames(merge.filtered))
## [1] "Empty_puro_Empty_hygro_AAACCCACAAACTAGA.1"
## [2] "Empty_puro_Empty_hygro_AAACCCACAATAACGA.1"
## [3] "Empty_puro_Empty_hygro_AAACCCAGTTTCGCTC.1"
## [4] "Empty_puro_Empty_hygro_AAACCCATCAAACTGC.1"
## [5] "Empty_puro_Empty_hygro_AAACCCATCTGAGAAA.1"
## [6] "Empty_puro_Empty_hygro_AAACGAAAGTAAGAGG.1"
tail(colnames(merge.filtered))
## [1] "ShenTBLC_TTTGTCAGTAAATGAC.1" "ShenTBLC_TTTGTCAGTCCCGACA.1"
## [3] "ShenTBLC_TTTGTCAGTGCCTTGG.1" "ShenTBLC_TTTGTCAGTGGAAAGA.1"
## [5] "ShenTBLC_TTTGTCAGTTCCAACA.1" "ShenTBLC_TTTGTCATCCTTTACA.1"
unique(sapply(X = strsplit(colnames(merge.filtered), split = "_"), FUN = "[", 1))
##  [1] "Empty"      "Embryo"     "Dppa2"      "Hu"         "XuESC1"    
##  [6] "XuESC2"     "XuEPS1"     "XuEPS2"     "XuEPSTPS"   "Xu2CTPS"   
## [11] "Cossec"     "Yu"         "ShenNC"     "ShenSnrpd2" "ShenPSC"   
## [16] "ShenTBLC"
table(merge.filtered$orig.ident)
## 
##         Cossec Dppa2_MERVL_TT           EVEV             Hu           ML11 
##           2458           2373           2687           9795            317 
##        ShenNCS       ShenPSCS    ShenSnrpd2S      ShenTBLCS        Xu2CTPS 
##           5332           4139           4152           4534           7942 
##         XuEPS1         XuEPS2       XuEPSTPS         XuESC1         XuESC2 
##          10061           8889          10337           9430           2986 
##            YuC            YuT 
##           2580           4158
table(merge.filtered$in.vivo)
## 
##     16cell      4cell      8cell        BXC C57twocell early2cell earlyblast 
##         58         14         47         13          8          8         43 
## fibroblast  late2cell  lateblast   mid2cell   midblast     zygote 
##         10         10         30         12         60          4

Confirming filtering

vp1 <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA")) +  theme(legend.position = 'none')
vp2 <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA")) +  theme(legend.position = 'none')
vp3 <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt")) + theme(legend.position = 'none')
vp1.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nFeature_RNA"),log=TRUE)+  theme(legend.position = 'none')
vp2.log <- VlnPlot(merge.filtered, pt.size=0, features = c("nCount_RNA"),log=TRUE)+  theme(legend.position = 'none')
vp3.log <- VlnPlot(merge.filtered, pt.size=0, features = c("percent.mt"),log=TRUE) +  theme(legend.position = 'none')
vp1
vp2
vp3
grid.arrange(vp1.log,vp2.log,vp3.log, nrow=1)

Splitting dataset

merge.list <- SplitObject(merge.filtered, split.by = "orig.ident")
merge.list <- lapply(X = merge.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, verbose = FALSE)
})

merge.list <- lapply(X = merge.list, FUN = function(x) {
    features <- VariableFeatures(x)
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
})

Integration anchors

anchors <- FindIntegrationAnchors(object.list = merge.list, reference = c(1, 2, 5, 6, 12, 14, 16), 
            reduction = "rpca",
            dims = 1:50 )
anchors

##Integration

totipotency <- IntegrateData(anchorset = anchors, dims = 1:50, k.weight = 80)
## Building integrated reference
## Merging dataset 2 into 14
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 6 into 5
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 16 into 14 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 1 into 14 2 16
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 5 6 into 14 2 16 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 12 into 14 2 16 1 5 6
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 3 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 4 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 7 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 8 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 9 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 10 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 11 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 13 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 15 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## 
## Integrating dataset 17 with reference dataset
## Finding integration vectors
## Finding integration vector weights
## Integrating data

Run the standard workflow for visualisation and clustering

Identify variable genes 1

DefaultAssay(totipotency) <- "RNA"
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(totipotency), 10)
plot1 <- VariableFeaturePlot(totipotency)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Scaling

all.genes <- rownames(totipotency)
totipotency <- ScaleData(totipotency)

#Omitted Normalization as this is an integrated object

PCA combined dataset

totipotency <- RunPCA(totipotency, npcs=100, features = rownames(totipotency))

print(totipotency[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(totipotency, dims = 1:6, reduction = "pca")
DimPlot(totipotency, reduction = "pca")
DimHeatmap(totipotency, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(totipotency, ndims=100)

UMAP resolution = 0.6 for totipotency

dim(totipotency)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
totipotency <- FindNeighbors(totipotency, dims = 1:50)
totipotency <- FindClusters(totipotency, resolution = 1)
totipotency <- RunUMAP(totipotency, dims = 1:50)
save(totipotency, file="Intermediate3.rda")
#load("Intermediate3.rda")

Original identity UMAP

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
p1

Clusters on UMAP

p2 <- DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)
p2

Quality control

DefaultAssay(totipotency) <- "RNA"
totipotency <- NormalizeData(totipotency)
totipotency <- FindVariableFeatures(totipotency, selection.method = "vst")

Quantifying cells in clusters

tbl_cell <- table(totipotency@meta.data$seurat_clusters, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Libraries per cluster")
Libraries per cluster
Cossec Dppa2_MERVL_TT EVEV Hu ML11 ShenNCS ShenPSCS ShenSnrpd2S ShenTBLCS Xu2CTPS XuEPS1 XuEPS2 XuEPSTPS XuESC1 XuESC2 YuC YuT
0 0 (0%) 1 (0%) 1 (0%) 10 (0.1%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 10 (0.2%) 6104 (76.9%) 14 (0.1%) 12 (0.1%) 6710 (64.9%) 2 (0%) 2 (0.1%) 1 (0%) 0 (0%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (0%) 7 (0.1%) 125 (1.2%) 290 (3.3%) 10 (0.1%) 8822 (93.6%) 13 (0.4%) 0 (0%) 1 (0%)
2 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 9 (0.2%) 39 (0.5%) 48 (0.5%) 8467 (95.3%) 20 (0.2%) 324 (3.4%) 6 (0.2%) 0 (0%) 0 (0%)
3 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (0%) 3 (0%) 8072 (80.2%) 76 (0.9%) 37 (0.4%) 98 (1%) 0 (0%) 1 (0%) 0 (0%)
4 0 (0%) 3 (0.1%) 0 (0%) 5694 (58.1%) 0 (0%) 0 (0%) 0 (0%) 5 (0.1%) 22 (0.5%) 32 (0.4%) 0 (0%) 0 (0%) 41 (0.4%) 0 (0%) 0 (0%) 1 (0%) 0 (0%)
5 0 (0%) 0 (0%) 0 (0%) 2 (0%) 0 (0%) 5111 (95.9%) 31 (0.7%) 244 (5.9%) 43 (0.9%) 1 (0%) 1 (0%) 3 (0%) 25 (0.2%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
6 0 (0%) 0 (0%) 0 (0%) 2 (0%) 0 (0%) 40 (0.8%) 4023 (97.2%) 17 (0.4%) 46 (1%) 3 (0%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
7 0 (0%) 0 (0%) 0 (0%) 14 (0.1%) 0 (0%) 13 (0.2%) 60 (1.4%) 29 (0.7%) 3929 (86.7%) 0 (0%) 4 (0%) 0 (0%) 15 (0.1%) 0 (0%) 1 (0%) 3 (0.1%) 1 (0%)
8 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 5 (0.1%) 1 (0%) 0 (0%) 7 (0.1%) 0 (0%) 1 (0%) 2 (0%) 19 (0.6%) 17 (0.7%) 3918 (94.2%)
9 0 (0%) 0 (0%) 0 (0%) 3913 (39.9%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (0%) 2 (0%) 1 (0%) 0 (0%) 18 (0.2%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
10 3 (0.1%) 0 (0%) 3 (0.1%) 0 (0%) 0 (0%) 8 (0.2%) 0 (0%) 3264 (78.6%) 16 (0.4%) 0 (0%) 15 (0.1%) 0 (0%) 2 (0%) 3 (0%) 0 (0%) 0 (0%) 0 (0%)
11 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (0%) 11 (0.1%) 3 (0%) 32 (0.4%) 6 (0.1%) 130 (1.4%) 2930 (98.1%) 0 (0%) 0 (0%)
12 0 (0%) 2 (0.1%) 2614 (97.3%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
13 0 (0%) 0 (0%) 0 (0%) 7 (0.1%) 0 (0%) 3 (0.1%) 9 (0.2%) 0 (0%) 13 (0.3%) 0 (0%) 1 (0%) 1 (0%) 4 (0%) 1 (0%) 7 (0.2%) 2367 (91.7%) 81 (1.9%)
14 0 (0%) 2361 (99.5%) 5 (0.2%) 1 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
15 0 (0%) 1 (0%) 0 (0%) 13 (0.1%) 0 (0%) 3 (0.1%) 0 (0%) 0 (0%) 9 (0.2%) 1232 (15.5%) 0 (0%) 0 (0%) 1087 (10.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
16 2213 (90%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
17 0 (0%) 0 (0%) 0 (0%) 5 (0.1%) 9 (2.8%) 0 (0%) 0 (0%) 0 (0%) 3 (0.1%) 356 (4.5%) 785 (7.8%) 2 (0%) 486 (4.7%) 44 (0.5%) 7 (0.2%) 0 (0%) 0 (0%)
18 0 (0%) 3 (0.1%) 5 (0.2%) 4 (0%) 1 (0.3%) 10 (0.2%) 0 (0%) 1 (0%) 2 (0%) 37 (0.5%) 45 (0.4%) 2 (0%) 1536 (14.9%) 0 (0%) 1 (0%) 0 (0%) 0 (0%)
19 0 (0%) 0 (0%) 0 (0%) 2 (0%) 0 (0%) 141 (2.6%) 13 (0.3%) 587 (14.1%) 150 (3.3%) 54 (0.7%) 0 (0%) 0 (0%) 214 (2.1%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
20 0 (0%) 0 (0%) 0 (0%) 2 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (0%) 32 (0.4%) 853 (8.5%) 0 (0%) 26 (0.3%) 2 (0%) 0 (0%) 0 (0%) 0 (0%)
21 0 (0%) 0 (0%) 0 (0%) 118 (1.2%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 2 (0%) 2 (0%) 1 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 183 (7.1%) 155 (3.7%)
22 0 (0%) 0 (0%) 0 (0%) 0 (0%) 306 (96.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
23 0 (0%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 1 (0%) 2 (0%) 0 (0%) 255 (5.6%) 5 (0.1%) 3 (0%) 1 (0%) 1 (0%) 0 (0%) 0 (0%) 2 (0.1%) 2 (0%)
24 0 (0%) 0 (0%) 0 (0%) 6 (0.1%) 1 (0.3%) 0 (0%) 0 (0%) 0 (0%) 17 (0.4%) 21 (0.3%) 83 (0.8%) 2 (0%) 95 (0.9%) 1 (0%) 0 (0%) 5 (0.2%) 0 (0%)
25 181 (7.4%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
26 0 (0%) 2 (0.1%) 59 (2.2%) 1 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (0%) 1 (0%) 0 (0%) 0 (0%) 0 (0%)
27 61 (2.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Quantifying embryo cells in new clusters

tbl_cell <- table(totipotency@meta.data$seurat_clusters, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Embryo cells per cluster")
Embryo cells per cluster
16cell 4cell 8cell BXC C57twocell early2cell earlyblast fibroblast late2cell lateblast mid2cell midblast zygote
0 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
1 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
2 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
3 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
4 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
5 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
6 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
7 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
8 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
9 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
10 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
11 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
12 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
13 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
14 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
15 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
16 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
17 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 9 (90%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
18 1 (1.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
19 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
20 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
21 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
22 57 (98.3%) 14 (100%) 47 (100%) 13 (100%) 8 (100%) 8 (100%) 43 (100%) 0 (0%) 10 (100%) 30 (100%) 12 (100%) 60 (100%) 4 (100%)
23 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
24 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (10%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
25 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
26 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
27 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Totipotency, pluripotency and PE markers in each cluster

###Plotting markers

features_vec <- c("3tbGFPPEST", "Arg2", "Nanog", "Gata6", "Sox17", "Zscan4d", "Tbra", "Eomes", "Cdx2", "Hand1", "Pou5f1")
fig_height <- length(features_vec)
VlnPlot(totipotency, features = features_vec, pt.size=0)
## Warning in FetchData.Seurat(object = object, vars = features, slot = slot): The
## following requested variables were not found: Tbra

Find markers for every new cluster 2

DefaultAssay(totipotency) <- "RNA"
mus.markers <- FindAllMarkers(totipotency, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(mus.markers, file = 'Aligned_markers.xlsx')
mus.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)

RNA slot of new object

totipotency <- ScaleData(totipotency, features = all.genes)
VlnPlot(totipotency, features = features_vec, pt.size=0)

#Which cells are pluripotent

FeaturePlot(totipotency, features = "Arg2")

FeaturePlot(totipotency, features = "Pou5f1")

FeaturePlot(totipotency, features = "Gata6")

FeaturePlot(totipotency, features = "Zscan4c")

FeaturePlot(totipotency, features = "Usp17lc")

#Heatmap

top10 <- mus.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(totipotency, features = top10$gene)

Original identity on new UMAP

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "orig.ident")
DimPlot(totipotency, reduction = "umap", label = TRUE, repel = TRUE)

p1

Cell types from in vivo data

p1 <- DimPlot(totipotency, reduction = "umap", group.by = "in.vivo", pt.size = 0.5)
p1

Final classification

new.cluster.ids <- c("Pluripotent", "Pluripotent", "Pluripotent", "Pluripotent", "Pluripotent", "Pluripotent OCT4hi", "Pluripotent", "Splicing and rRNA", "Pluripotent OCT4hi", "Differentiating Primitive Endoderm", "Differentiating TE", "Pluripotent", "Pluripotent OCT4hi", "Pluripotent OCT4hi", "Totipotent", "Low totipotent", "Differentiated", "Fibroblast", "Ectoderm", "Primitive Endoderm", "Pluripotent", "Endoderm", "Embryo", "Migration", "Inflammation", "Mitotic", "Pluripotent", "Primitive Endoderm")
names(new.cluster.ids) <- levels(totipotency)
totipotency <- RenameIdents(totipotency, new.cluster.ids)
DimPlot(totipotency, reduction = "umap", label = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend()

Libraries per cluster

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
Cluster_distribution
Cossec Dppa2_MERVL_TT EVEV Hu ML11 ShenNCS ShenPSCS ShenSnrpd2S ShenTBLCS Xu2CTPS XuEPS1 XuEPS2 XuEPSTPS XuESC1 XuESC2 YuC YuT
Pluripotent 0 (0%) 6 (0.3%) 60 (2.2%) 5709 (58.3%) 0 (0%) 41 (0.8%) 4023 (97.2%) 22 (0.5%) 92 (2%) 6231 (78.5%) 9115 (90.6%) 8877 (99.9%) 6852 (66.3%) 9379 (99.5%) 2951 (98.8%) 3 (0.1%) 1 (0%)
Pluripotent OCT4hi 0 (0%) 2 (0.1%) 2614 (97.3%) 9 (0.1%) 0 (0%) 5114 (95.9%) 40 (1%) 249 (6%) 57 (1.3%) 2 (0%) 9 (0.1%) 5 (0.1%) 30 (0.3%) 3 (0%) 26 (0.9%) 2384 (92.4%) 3999 (96.2%)
Splicing and rRNA 0 (0%) 0 (0%) 0 (0%) 14 (0.1%) 0 (0%) 13 (0.2%) 60 (1.4%) 29 (0.7%) 3929 (86.7%) 0 (0%) 4 (0%) 0 (0%) 15 (0.1%) 0 (0%) 1 (0%) 3 (0.1%) 1 (0%)
Differentiating Primitive Endoderm 0 (0%) 0 (0%) 0 (0%) 3913 (39.9%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 2 (0%) 2 (0%) 1 (0%) 0 (0%) 18 (0.2%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Differentiating TE 3 (0.1%) 0 (0%) 3 (0.1%) 0 (0%) 0 (0%) 8 (0.2%) 0 (0%) 3264 (78.6%) 16 (0.4%) 0 (0%) 15 (0.1%) 0 (0%) 2 (0%) 3 (0%) 0 (0%) 0 (0%) 0 (0%)
Totipotent 0 (0%) 2361 (99.5%) 5 (0.2%) 1 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Low totipotent 0 (0%) 1 (0%) 0 (0%) 13 (0.1%) 0 (0%) 3 (0.1%) 0 (0%) 0 (0%) 9 (0.2%) 1232 (15.5%) 0 (0%) 0 (0%) 1087 (10.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Differentiated 2213 (90%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Fibroblast 0 (0%) 0 (0%) 0 (0%) 5 (0.1%) 9 (2.8%) 0 (0%) 0 (0%) 0 (0%) 3 (0.1%) 356 (4.5%) 785 (7.8%) 2 (0%) 486 (4.7%) 44 (0.5%) 7 (0.2%) 0 (0%) 0 (0%)
Ectoderm 0 (0%) 3 (0.1%) 5 (0.2%) 4 (0%) 1 (0.3%) 10 (0.2%) 0 (0%) 1 (0%) 2 (0%) 37 (0.5%) 45 (0.4%) 2 (0%) 1536 (14.9%) 0 (0%) 1 (0%) 0 (0%) 0 (0%)
Primitive Endoderm 61 (2.5%) 0 (0%) 0 (0%) 2 (0%) 0 (0%) 141 (2.6%) 13 (0.3%) 587 (14.1%) 150 (3.3%) 54 (0.7%) 0 (0%) 0 (0%) 214 (2.1%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Endoderm 0 (0%) 0 (0%) 0 (0%) 118 (1.2%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 2 (0%) 2 (0%) 1 (0%) 0 (0%) 1 (0%) 0 (0%) 0 (0%) 183 (7.1%) 155 (3.7%)
Embryo 0 (0%) 0 (0%) 0 (0%) 0 (0%) 306 (96.5%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Migration 0 (0%) 0 (0%) 0 (0%) 1 (0%) 0 (0%) 1 (0%) 2 (0%) 0 (0%) 255 (5.6%) 5 (0.1%) 3 (0%) 1 (0%) 1 (0%) 0 (0%) 0 (0%) 2 (0.1%) 2 (0%)
Inflammation 0 (0%) 0 (0%) 0 (0%) 6 (0.1%) 1 (0.3%) 0 (0%) 0 (0%) 0 (0%) 17 (0.4%) 21 (0.3%) 83 (0.8%) 2 (0%) 95 (0.9%) 1 (0%) 0 (0%) 5 (0.2%) 0 (0%)
Mitotic 181 (7.4%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Embryo cells per cluster

tbl_cell <- table(totipotency@active.ident, totipotency@meta.data$in.vivo)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add %>% gt(rowname_col="rownames") %>% tab_header(title = "Cluster_distribution")
Cluster_distribution
16cell 4cell 8cell BXC C57twocell early2cell earlyblast fibroblast late2cell lateblast mid2cell midblast zygote
Pluripotent 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Pluripotent OCT4hi 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Splicing and rRNA 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Differentiating Primitive Endoderm 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Differentiating TE 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Totipotent 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Low totipotent 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Differentiated 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Fibroblast 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 9 (90%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Ectoderm 1 (1.7%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Primitive Endoderm 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Endoderm 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Embryo 57 (98.3%) 14 (100%) 47 (100%) 13 (100%) 8 (100%) 8 (100%) 43 (100%) 0 (0%) 10 (100%) 30 (100%) 12 (100%) 60 (100%) 4 (100%)
Migration 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Inflammation 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (10%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Mitotic 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%) 0 (0%)

Split plot per library

DimPlot(totipotency, reduction = "umap", split.by = "orig.ident", ncol = 3, pt.size = 0.3)

Save data as Run3_final3

save(totipotency, file="Aligned_final.rda")
#load("Aligned.rda")

Plotting cell cycle on new UMAP

DefaultAssay(totipotency) <- "RNA"

g2m_1.genes <-  c("Hmgb2","Cdk1","Nusap1","Ube2c","Birc5","Tpx2","Top2a","Ndc80","Cks2","Nuf2","Cks1b", "Mki67",   "Tmpo","Cenpf","Tacc3","Fam64a","Smc4","Ccnb2","Ckap2l","Ckap2","Aurkb","Bub1","Kif11","Anp32e","Tubb4b","Gtse1","Kif20b","Hjurp","Cdca3","Hn1","Cdc20","Ttk","Cdc25c","Kif2c","Rangap1","Ncapd2","Dlgap5","Cdca2","Cdca8","Ect2","Kif23","Hmmr","Aurka","Psrc1","Anln","Lbr","Ckap5","Cenpe","Ctcf","Nek2","G2e3","Gas2l3","Cbx5","Cenpa") 

s_1.genes <-  c("Mcm5","Pcna","Tyms","Fen1","Mcm2","Mcm4","Rrm1","Ung","Gins2","Mcm6","Cdca7","Dtl","Prim1","Uhrf1","Mlf1ip","Hells","Rfc2","Rpa2","Nasp","Rad51ap1","Gmnn","Wdr76","Slbp","Ccne2","Ubr7","Pold3","Msh2","Atad2","Rad51","Rrm2","Cdc45","Cdc6","Exo1","Tipin","Dscc1","Blm","Casp8ap2","Usp1","Clspn","Pola1","Chaf1b","Brip1","E2f8")   

totipotency <- CellCycleScoring(totipotency, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)
DimPlot(totipotency, reduction = "umap")

#Cell cycle bar chart

totipotency@meta.data$Phase <- factor(totipotency@meta.data$Phase, levels = c("G1", "S", "G2M"))
totipotency@meta.data %>% ggplot(aes(x=old.ident,fill=Phase)) +
  geom_bar(position = "fill",size = 3, width = .8) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 11)) +
  labs(x = "", y = "Cell fraction")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

#Plotting DMTT

Idents(object = totipotency) <- "orig.ident"
highlight <- WhichCells(totipotency, ident = "Dppa2_MERVL_TT")
DimPlot(totipotency, reduction = "umap", cells.highlight = highlight, pt.size = 1, cols.highlight = c("darkgreen"))

#Plotting Hu

Idents(object = totipotency) <- "orig.ident"
highlight <- WhichCells(totipotency, ident = "Hu")
DimPlot(totipotency, reduction = "umap", cells.highlight = highlight, pt.size = 1, cols.highlight = c("darkgreen"))

#Plotting XuEPSTPS

highlight <- WhichCells(totipotency, ident = "XuEPSTPS")
DimPlot(totipotency, reduction = "umap", cells.highlight = highlight, pt.size = 1, cols.highlight = c("darkgreen"))

#Plotting Xu2CTPS

highlight <- WhichCells(totipotency, ident = "Xu2CTPS")
DimPlot(totipotency, reduction = "umap", cells.highlight = highlight, pt.size = 1, cols.highlight = c("darkgreen"))

#Plotting Cossec

highlight <- WhichCells(totipotency, ident = "Cossec")
DimPlot(totipotency, reduction = "umap", cells.highlight = highlight, pt.size = 1, cols.highlight = c("darkgreen"))

#Subsetting on totipotency

tot <- subset(x = totipotency, Zscan4c > 1)
DimPlot(tot, reduction = "umap")

save(tot, file="Zscan4.rda")
#load(file='Zscan4.rda')  

Run the standard workflow for visualisation and clustering

Identify variable genes in totipotent cluster

tot <- FindVariableFeatures(tot, selection.method = "vst", nfeatures = 3000)
top10 <- head(VariableFeatures(tot), 10)
plot1 <- VariableFeaturePlot(tot)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE, xnudge=0, ynudge=0)
plot2

Scaling

all.genes <- rownames(tot)
tot <- ScaleData(tot, features = all.genes)

PCA

tot <- RunPCA(tot, npcs=100, features = rownames(tot))

print(tot[["pca"]], dims = 1:6, nfeatures = 5)
VizDimLoadings(tot, dims = 1:6, reduction = "pca")
DimPlot(tot, reduction = "pca")
DimHeatmap(tot, dims = 1:6, cells = 500, balanced = TRUE)
ElbowPlot(tot, ndims=100)

UMAP resolution = 0.6 for totipotency

dim(tot)
#0.4-1.2 for resolution for 3000 cells, according to seurat tutorial
tot <- FindNeighbors(tot, dims = 1:50)
tot <- FindClusters(tot, resolution = 0.2)
tot <- RunUMAP(tot, dims = 1:50)
DimPlot(tot, reduction = "umap", label = TRUE, repel = TRUE)

Original identity UMAP

p1 <- DimPlot(tot, reduction = "umap", group.by = "orig.ident")
p1

## Normalise and find variable features

DefaultAssay(tot) <- "RNA"
tot <- NormalizeData(tot)
tot <- FindVariableFeatures(tot, selection.method = "vst")

Quantify cells in new clusters

tbl_cell <- table(tot@active.ident, tot@meta.data$orig.ident)
tbl_add <- as.data.frame(matrix(paste0(tbl_cell, " (", round(sweep(tbl_cell, 2, colSums(tbl_cell), FUN="/"), 3) * 100, "%)"), ncol=ncol(tbl_cell)))
names(tbl_add) <- colnames(tbl_cell)
tbl_add$rownames <- rownames(tbl_cell)
tbl_add 
##       Cossec Dppa2_MERVL_TT     EVEV           Hu     ML11   ShenNCS ShenPSCS
## 1     0 (0%)         0 (0%)   0 (0%) 3130 (57.5%)   0 (0%)    0 (0%)   0 (0%)
## 2     0 (0%)         0 (0%)   0 (0%) 2314 (42.5%)   0 (0%)    0 (0%)   0 (0%)
## 3     0 (0%)    1915 (100%) 1 (100%)       0 (0%) 6 (100%)    0 (0%)   0 (0%)
## 4     0 (0%)         0 (0%)   0 (0%)       0 (0%)   0 (0%) 2 (22.2%)   0 (0%)
## 5     0 (0%)         0 (0%)   0 (0%)       0 (0%)   0 (0%) 7 (77.8%) 7 (100%)
## 6     0 (0%)         0 (0%)   0 (0%)       0 (0%)   0 (0%)    0 (0%)   0 (0%)
## 7 219 (100%)         0 (0%)   0 (0%)       0 (0%)   0 (0%)    0 (0%)   0 (0%)
## 8     0 (0%)         0 (0%)   0 (0%)       0 (0%)   0 (0%)    0 (0%)   0 (0%)
## 9     0 (0%)         0 (0%)   0 (0%)       0 (0%)   0 (0%)    0 (0%)   0 (0%)
##   ShenSnrpd2S  ShenTBLCS     Xu2CTPS    XuEPSTPS      YuC       YuT rownames
## 1      0 (0%)     0 (0%)      0 (0%)      0 (0%)   0 (0%)    0 (0%)        0
## 2      0 (0%)     0 (0%)      0 (0%)    1 (0.2%)   0 (0%)    0 (0%)        1
## 3      0 (0%)     0 (0%)      0 (0%)      0 (0%)   0 (0%)    0 (0%)        2
## 4      0 (0%)     0 (0%)    5 (1.4%) 656 (99.8%)   0 (0%)    0 (0%)        3
## 5    3 (1.5%) 458 (100%)      0 (0%)      0 (0%)   0 (0%)    0 (0%)        4
## 6      0 (0%)     0 (0%) 345 (98.6%)      0 (0%)   0 (0%)    0 (0%)        5
## 7      0 (0%)     0 (0%)      0 (0%)      0 (0%)   0 (0%)    0 (0%)        6
## 8 199 (98.5%)     0 (0%)      0 (0%)      0 (0%)   0 (0%)    0 (0%)        7
## 9      0 (0%)     0 (0%)      0 (0%)      0 (0%) 5 (100%) 36 (100%)        8

Scale data

Find markers for every new cluster

DefaultAssay(tot) <- "RNA"
tot.markers <- FindAllMarkers(tot, only.pos = TRUE, min.pct =0.1, logfc.threshold = 0.25)
write.xlsx(tot.markers, file = 'Zscan4_markers.xlsx')
tot.markers %>% group_by(cluster) %>% top_n(n = 4, wt = avg_log2FC)

#Heatmap

top10 <- tot.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
DoHeatmap(tot, features = top10$gene) + NoLegend() + theme(axis.text.y = element_text(size = 4.5))

Original identity on new UMAP

p1 <- DimPlot(tot, reduction = "umap", group.by = "orig.ident")
DimPlot(tot, reduction = "umap", label = TRUE, repel = TRUE)

p1

Cell types from in vivo data

p1 <- DimPlot(tot, reduction = "umap", group.by = "in.vivo",pt.size = 1)
p1

Final classification 2

new.cluster.ids <- c("Hu", "Hu", "DM_TT and 2C embryo", "Xu EPS TPS", "Shen TBLCs", "Xu 2C TPS", "Cossec", "Shen Snrpd2", "Yu")
names(new.cluster.ids) <- levels(tot)
tot <- RenameIdents(tot, new.cluster.ids)
DimPlot(tot, reduction = "umap", label = TRUE, pt.size = 0.5, repel = TRUE) + NoLegend()

Save data as Tot_reclustered

save(tot, file="Zscan4_reclustered.rda")

Assigning cell cycle score

tot <- CellCycleScoring(tot, s.features = s_1.genes, g2m.features = g2m_1.genes, set.ident = TRUE)

Plotting cell cycle score on UMAP

DimPlot(tot, reduction = "umap")

#Cell cycle bar chart

tot@meta.data$Phase <- factor(tot@meta.data$Phase, levels = c("G1", "S", "G2M"))
tot@meta.data %>% ggplot(aes(x=old.ident,fill=Phase)) +
  geom_bar(position = "fill",size = 3, width = .8) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 11)) +
  labs(x = "", y = "Cell fraction")